# wrapper for estimation window function
estimation_window <- function(data, i, calendar, event_date, 
                              estim_beg, estim_end, event_end, 
                              estim_LASSO = TRUE, LASSO_rmall_NAs = FALSE, 
                              LASSO_first_col,
                              seed, n_folds = 5){
  
  # code 'not-in' operator
  `%!in%` <- Negate(`%in%`)
  
  # Prepare windows ---- 
  estim_beg_date <- offset(event_date, n = estim_beg, calendar) 
  estim_end_date <- offset(event_date, n = estim_end, calendar) 
  event_end_date <- offset(event_date, n = event_end, calendar)
  
  # prepare working data; keep only the two windows of interest and necessary variables:
  dat <- data %>%
    dplyr::filter(ticker_symbol == i) %>%
    dplyr::mutate(estimwindow = (date >= estim_beg_date &
                                   date <= estim_end_date) %>% as.numeric(),
                  eventwindow = (date > estim_end_date &
                                   date <= event_end_date) %>% as.numeric()) %>%
    dplyr::filter(estimwindow == 1 |
                    eventwindow == 1) %>%
    dplyr::select(ticker_symbol, date, chg, estimwindow, eventwindow, all_of(LASSO_first_col):ncol(.))
  
  if (estim_LASSO){
    library(glmnet, quietly = TRUE)
    
    # prepare lasso training data:
    lasso <- dat %>%
      dplyr::select(date, all_of(LASSO_first_col):ncol(.)) %>%
      # remove the aggregate index for the S&P 500:
      dplyr::select(-"SP500")
    
    # drop indexes with too many missing values for the lasso:
    cols <- colnames(lasso)
    out <- map(.x = cols,
               .f = function(x) sum(is.na(lasso[,x]))) %>% 
      unlist()
    
    # we drop indexes that have even just *one* NA. Alternatively, you can consider the median of NAs:
    if (LASSO_rmall_NAs){
      drop <- which(out > 0)
    } else {
      drop <- which(out > median(out))
    }
    
    keep <- 1:ncol(lasso) %!in% drop
    lasso <- lasso[,keep]
    
    # remove date from lasso (not needed for the model)
    lasso <- lasso %>%
      dplyr::select(-date)
    
    # drop the index that is the very firm to be regressed, if present
    if (i %in% colnames(lasso)){
      lasso <- lasso %>%
        dplyr::select(-all_of(i))
    }
    
    # initialize results dataframes:
    estim_results <- NULL
    event_results <- NULL
    
    # and an error vector:
    ID_errors <- NULL
    
    # separate estimation data
    estim_win <- dat %>%
      dplyr::filter(estimwindow == 1)
    
    # trycatch the model
    tryCatch({ 
      set.seed(seed) # seed for replication
      # LASSO market models, separate x matrix and y vector; we need to drop NAs manually, lasso won't do it for us:
      x <- estim_win %>%
        dplyr::ungroup() %>%
        dplyr::select(all_of(colnames(lasso)), chg) %>% 
        na.omit() %>%
        dplyr::select(-chg) %>% as.matrix()
      y <- estim_win %>%
        dplyr::ungroup() %>%
        dplyr::select(all_of(colnames(lasso)), chg) %>% 
        na.omit() %>%
        dplyr::select(chg) %>% as.matrix()
      
      # run the model with specified number of CV folds:
      lasso_cv <- cv.glmnet(x, y, family = "gaussian", nfolds = n_folds)
      best_lambda <- lasso_cv$lambda.min # select best lambda
      # get coefficients of best model based on best lambda
      best_model <- glmnet(x, y, alpha = 1, lambda = best_lambda)
      
      # save the betas to store them in the output:
      betas <- best_model$beta %>% 
        as.matrix() %>%
        as.data.frame() %>%
        tibble::rownames_to_column("index") %>%
        tibble::as_tibble() %>%
        dplyr::rename("beta" = "s0")
      
      # find R-squared of model on training data. Do so manually:
      y_predicted <- predict(best_model, s = best_lambda, newx = x)
      
      # compute R2:
      sst <- sum((y - mean(y))^2)
      sse <- sum((y_predicted - y)^2)
      r2 <- 1 - sse/sst

      # now predict returns based on the model in *both* windows
      dat$fitted_LASSO <- predict(best_model, 
                                  s = best_lambda, 
                                  newx = dat %>%
                                    dplyr::ungroup() %>%
                                    dplyr::select(all_of(colnames(lasso))) %>%
                                    as.matrix()) %>%
        as.numeric()
        
      # compute abnormal returns as differences:
      dat$abnormal_LASSO <- dat$chg - dat$fitted_LASSO
      dat$abnormal_LASSO <- as.numeric(dat$abnormal_LASSO)
      
      # store results
      results <- data.frame(ticker = i,
                            R2 = r2)
      estim_results <- rbind(estim_results, results)
      
      # add to dat
      results <- dat %>% 
        dplyr::select(ticker_symbol, date, estimwindow, eventwindow, chg, fitted_LASSO, abnormal_LASSO) %>%
        dplyr::arrange(date)
      event_results <- rbind(event_results, results)
      
    }, 
    error = function(x){
      ID_errors <<- c(ID_errors, i) # parent environment assignment (need to "go out" of function environment)
      return(ID_errors)
    })
  } else {
    # separate estimation window data:
    ols <- dat %>%
      dplyr::filter(estimwindow == 1) %>%
      dplyr::select(date, "SP500", "chg")
    
    # initialize results dataframes:
    estim_results <- NULL
    event_results <- NULL
    
    # and an error vector:
    ID_errors <- NULL
    
    # trycatch the model
    tryCatch({ 
      
      ols_mod <- lm(chg ~ SP500, data = dat)

      # save beta:
      betas <- data.frame(index = "SP500",
                          beta = summary(ols_mod)$coefficients["SP500","Estimate"])
      
      # find R-squared of model on training data
      r2 <- summary(ols_mod)$r.squared

      # save model predicted returns on *both* windows, then compute abnormal returns:
      dat$fitted_OLS <- predict(ols_mod, newdata = data.frame(SP500 = dat$SP500))
      dat$abnormal_OLS <- dat$chg - dat$fitted_OLS
      dat$abnormal_OLS <- as.numeric(dat$abnormal_OLS)
      
      # store results
      results <- data.frame(ticker = i,
                            R2 = r2)
      estim_results <- rbind(estim_results, results)
      
      results <- dat %>% 
        dplyr::select(ticker_symbol, date, estimwindow, eventwindow, chg, fitted_OLS, abnormal_OLS) %>%
        dplyr::arrange(date)
      event_results <- rbind(event_results, results)
      
    }, 
    error = function(x){
      ID_errors <<- c(ID_errors, i) # parent environment assignment (need to "go out" of function environment)
      return(ID_errors)
    })
  }
  
  if (!"betas" %in% ls()){
    betas <- NULL
  }
  
  # export:
  out <- list("fit" = estim_results, 
              "counterfactuals" = event_results, 
              "errors" = ID_errors,
              "betas" = betas)
  return(out)
}